set.seed(1000)
library(pacman)
p_load("data.table", "lfe", "Formula", "interplot", "stargazer")

# Set wd to file location
data <- fread("data.csv")

##################################################
# Figure in Appendix 2: Outliers in Election Data
##################################################
  par(mfrow = c(1, 2), mar = c(3.5, 3.5, 1, 1), mgp = c(2, 1, 0))
  plot(data$turnout_2013, data$turnout_2016, pch = 16, col = "grey", xlab = "Turnout in 2013", ylab = "Turnout in 2016", main = paste("Full data (N = ", nrow(data) ,")", sep = ""), xlim = c(20, 110), ylim = c(20, 110))
  abline(0, 1)
  w <- which(data$turnout_2016 > 99)
  points(data$turnout_2013[w], data$turnout_2016[w], pch = 16, col = "red")
  data <- subset(data, turnout_2016 < 99)
  plot(data$turnout_2013, data$turnout_2016, xlab = "Turnout in 2013", ylab = "Turnout in 2016", pch = 16, col = "grey", main = paste("Outliers removed (N = ", nrow(data) ,")", sep = ""))
  abline(0, 1)

# The data frame after the two outliers removed:
data <- data[turnout_2016 < 99]

##################################################
# Figure in Appendix 3: Power calculations
##################################################
power_calc <- function(){
  # Assign treatment
  data[, x := sample(rep(1:3, each = nrow(data)/3))]
  # Simulate turnout
  data[, y := rbinom(nrow(data), voters_2016, turnout_2013/100 + 0.02*(x %in% 2:3))/voters_2016]
  # Regression
  b <- lm(y ~ factor(x), data = data, weights = voters_2016)%>%summary()%>%coefficients()
  # Clean up
  data[, c("x", "y") := NULL]
  # Test the null hypotheses
  sapply(c(0.05, 0.1), function(z) c(b[2, 4] < z & b[3, 4] < z, b[2, 4] < z, b[2, 4] < z | b[3, 4] < z))
}

# Results
replicate(1000, power_calc())%>%apply(., 1:2, mean)

##################################################
# # Table 2 in Article: Balance tests
##################################################
m <- lm(apartments ~ treatment, data = data, weights = voters_2016, na.action = na.exclude)
bal_fit <- list(
  update(m, apartments ~ .),
  update(m, buildings ~ .),
  update(m, voters_2016 ~ .),
  update(m, turnout_2011 ~ .),
  update(m, united_russia_2011 ~ .),
  update(m, turnout_2012 ~ .),
  update(m, putin_2012 ~ .),
  update(m, turnout_2013 ~ .),
  update(m, navalny_2013 ~ .)
)

out_balance <- function(m){
y <- roundr(predict(m, newdata = data.frame(treatment = c("A", "B", "C"))))
b <- roundr(coefficients(summary(m))[2:3, c(1, 4)], 2)
b <- paste(b[,1], " (", b[,2], ")", sep = "")
c(y, b)
}

out <- cbind(c("Number of apartments", "Number of buildings", "Number of voters", "Turnout in 2011 elections", "United Russia vote in 2011 elections", "Turnout in 2012 elections", "Putin vote in 2012 elections", "Turnout in 2013 elections", "Navalny vote in 2013 elections"), t(sapply(bal_fit, out_balance)))

print(xtable(out,
             caption="Balance tests.", label = "tab:balance",
             align = c("l", "l", rep("C{2cm}", 3), rep("C{3cm}", 2))),
      sanitize.text.function = identity,
      hline.after = FALSE,
      include.rownames = FALSE,
      include.colnames = FALSE,
      caption.placement = 'bottom',
      booktabs = TRUE,
      type = "latex",
      add.to.row = list(
        pos = as.list(c(-1, 0, 0, nrow(out), nrow(out))),
        command = c("\\toprule\n", "& \\multicolumn{3}{c}{Group means} & \\multicolumn{2}{c}{Treatment -- Control} \\\\ \\cmidrule(l){2-4}\\cmidrule(l){5-6} \n", "& \\multicolumn{1}{l}{Control} & \\multicolumn{1}{c}{Pivotal} & \\multicolumn{1}{c}{Shame}  & \\multicolumn{1}{c}{Pivotal} & \\multicolumn{1}{c}{Shame}  \\\\\n", "\\bottomrule\n", paste("\\multicolumn{6}{p{\\linewidth}}{{\\footnotesize ", "Observations weighted by precinct voting population. P-values for two-tailed t-test in parentheses.}\n}", sep = "")))
    )

##################################################
# Table 3 in Article: Treatment effects
##################################################    
# WLS fits:
m <- lm(turnout_2016 ~ treatment, data = data, weights = voters_2016)
fit <- list(
  m,
  update(m, . ~  . + turnout_2013),
  update(m, galiamina_2016 ~ .),
  update(m, galiamina_2016 ~ . + turnout_2013),
  update(m, yabloko_2016 ~ .),  
  update(m, yabloko_2016 ~ . + turnout_2013)
)
# Main results using WLS
stargazer(fit, omit = c("Constant"), star.cutoffs = c(0.05, 0.01), keep.stat = c("adj.rsq", "n"), dep.var.caption = "", dep.var.labels = c("Turnout", "Candidate vote", "Party vote"), out = "itt.tex", title = "Campaign effects.", covariate.labels = c("Closeness", "Impact", "Turnout in 2013"), notes.align = "l", digits = 2, label = "tab:itt", align = TRUE, notes.append = TRUE, notes.label = "", notes = "Dependent variables are measured in percentages.")

# Calculate p-value for one-sided null hypotheses:
p_val <- function(cutoff){
g <- function(x){
b <- coefficients(summary(x))
data.table(h = paste0("$H_0: \\beta_{", c("Closeness", "Impact"), "} < ",  cutoff, "$"), p = round(pt((b[,1] - cutoff)/b[,2], df = x$df.residual, lower.tail = FALSE)[2:3], 2))
}
out <- lapply(fit, g) %>% do.call("cbind", .)
out <- out[, -c(3, 5, 7, 9, 11)]
out
}
# Add these manually to the above table
p_val(1)
p_val(2)


##################################################
# Table in Appendix 4: OLS estimates
##################################################  
fit_noweights <- lapply(fit, function(x) update(x, weights = rep(1, nrow(data))))
stargazer(fit_noweights, omit = c("Constant"), star.cutoffs = c(0.05, 0.01), keep.stat = c("adj.rsq", "n"), dep.var.caption = "", dep.var.labels = c("Turnout", "Candidate vote", "Party vote"), out = "itt_noweights.tex", title = "OLS estimates of the campaign effects.", covariate.labels = c("Closeness", "Impact", "Turnout in 2013"), notes.align = "l", digits = 2, label = "tab:itt", align = TRUE, notes.append = TRUE, notes.label = "", notes = "Dependent variables are measured in percentages.")

#####################################################
# Figure in Appendix 5 (Additional robustness checks)
#####################################################  
# Design matrix for pre-treatment covariates
X <- data.frame(data)[, unlist(lapply(bal_fit, function(x) as.character(x$terms[[2]])))]
# All combinations of X's columns
res <- Map(combn, list(1:ncol(X)), seq_along(1:ncol(X)), simplify = FALSE)
var <- unlist(res, recursive = FALSE)

lm_comb <- function(fit, x){
  X.star <<- X[, x]
  update(fit, . ~ . + as.matrix(X.star))
}

out <- lapply(fit[c(2, 4, 6)], function(z) lapply(var, lm_comb, fit = z))

outt <- function(i) {
  m1 <- do.call("rbind", lapply(out[[i]], function(x) cbind(coefficients(x)[c("treatmentB", "treatmentC")], confint(x, parm = c("treatmentB", "treatmentC")))))
  treat <- rownames(m1)
  m1 <- data.frame(m1, outcome = as.character(fit[[2*i]]$terms[[2]]))
  m1$treat <- treat
  m1
}

cov_out <- data.frame(do.call("rbind", lapply(1:3, outt)))
cov_out <- cov_out %>%
  group_by(treat, outcome) %>%
  mutate(rank = row_number(V1))
cov_out$treat <- factor(cov_out$treat, labels = c("Closeness", "Impact"))
cov_out$outcome <- factor(cov_out$outcome, labels = c("Turnout", "Candidate", "Party"))

ggplot(cov_out, aes(x=rank, y=V1)) + geom_errorbar(aes(ymin=X2.5.., ymax=X97.5..), color = "light blue") + geom_point() + facet_grid(treat ~ outcome) + theme_minimal() + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) + ylab("") + geom_hline(yintercept = 0, col = "red")

#####################################################
# Table in Appendix 6 (Alternative outcomes)
#####################################################
data[, y := 100*rowSums(.SD), .SDcols = c(
                                   "Cand-Галямина Юлия Евгеньевна", 
                                   "Cand-Макаров Вячеслав Геннадьевич", 
                                   "Cand-Юрченко Юрий Петрович", 
                                   "Cand-Онищенко Иван Владимирович",
                                   "Cand-Объедков Федор Юрьевич",
                                   "Cand-Нечаев Андрей Алексеевич")]
fit_alt <- list(
  update(m, y ~  . ),
  update(m, y ~  . + turnout_2013)
)

stargazer(fit_alt, omit = c("Constant"), star.cutoffs = c(0.05, 0.01), keep.stat = c("adj.rsq", "n"), dep.var.caption = "", dep.var.labels = c("All opposition candidates"), covariate.labels = c("Closeness", "Impact", "Turnout in 2013"), notes.align = "l", digits = 2, label = "tab:all_candidates", align = TRUE, notes.append = TRUE, notes.label = "", notes = "Dependent variables are measured in percentages.")

#######################################################################
# Figure in Appendix 7 (Interactive effects)
#######################################################################
m <- lm(turnout_2016 ~ treatment*navalny_2013 + turnout_2013, data = data, weights = voters_2016)
interplot(m = m, var1 = c("treatmentB"), var2 = "navalny_2013", lwd = 2, hist = TRUE) + xlab("Navalny's voteshare in 2013") + ylab("Marginal effects") + theme_bw() + geom_hline(yintercept = 0, linetype = "dashed") + theme(strip.text.x = element_text(size = 12, face = "bold"))
interplot(m = m, var1 = c("treatmentC"), var2 = "navalny_2013", lwd = 2, hist = TRUE) +  xlab("Navalny's voteshare in 2013") + ylab("Marginal effects") + theme_bw() + geom_hline(yintercept = 0, linetype = "dashed") + theme(strip.text.x = element_text(size = 12, face = "bold"))

#################################################################
# Figure 1 in Article: Effects at different levels of saturation
#################################################################

plot.fit <- function(fit, title = "") {
  
  x <- sort(data$delivered)
  # Upper bound (else N < 100):
  x <- unique(x[x <= rev(x)[100]])
  
  f <- function(x) {
    d <- data[data$delivered >= x, ]
    out <- update(fit, data = d, weights = voters_2016)
    w <- grep("treat", names(coefficients(out)))
    data.frame(treat = c("Closeness", "Impact"), cbind(threshold = x, b = coefficients(out), confint(out))[w, ])
  }
  y <- do.call("rbind", lapply(x, f))
  
  ggplot(data = y, aes(x = threshold, y = b, ymin = X2.5.., ymax = X97.5..)) + geom_point() +  geom_errorbar(width = 0.01) + theme_bw() + theme(panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), plot.title = element_text(hjust = 0.5)) + ylab("Coefficient and 95\\% CI") + xlab("Saturation threshold") + facet_grid(. ~ treat) + ggtitle(title) 
  
}
plot.fit(fit[[2]], title = "")

#########################
# Figures in Appendix 8
#########################
plot.fit(fit[[4]], title = "Galiamina vote")
plot.fit(fit[[6]], title = "Yabloko vote")

#############################
#  Appendix 9: Election fraud
#############################
# Simulation: how much manipulation should there be to explain our results?
N <- data$voters_2016
D <- data$treatment
# Treatment effects of 2% points for each treatmen
y <- rbinom(length(N), N, 1/3 + 0.01*I(D=="B") + 0.02*I(D=="C"))
summary(lm(100*(y/N) ~ D, weights = N))
# Add extra votes randomly
fraud_sim <- function(f){
  g <- function(){
    y <- rbinom(length(N), N, 1/3 + 0.01*I(D=="B") + 0.02*I(D=="C"))
    m <- rbinom(length(N), N-y, f)
    coefficients(lm(100*((y+m)/N) ~ D, weights = N))[2:3]
  }
  out <- t(replicate(1000, g()))
  data.frame(f = rep(f, 2), var = rep(c("Closeness", "Impact"), each = nrow(out)), value = c(out[,1], out[,2]))
}

f <- c(0, 0.05, 0.25, 0.5, .75)
out <- do.call("rbind", lapply(f, fraud_sim))

ggplot(out, aes(x = factor(f), y = value)) + geom_boxplot() + theme_bw() + xlab("Turnout inflation") + facet_grid(. ~ var) + ylab("Coefficient estimates") + geom_hline(data = data.frame(var = unique(out$var), Z = c(1, 2)), aes(yintercept = Z), color = "blue") + theme(axis.text.x = element_text(face="bold", size=12)) + scale_x_discrete(labels = paste(100*f, "\\%", sep = ""))

#############################
#  END
#############################